import time
import scrapy
from douban.items import DoubanItem

class MoveSpider(scrapy.Spider):
    name = "move"
    # 域名
    allowed_domains = ["douban.com"]
    # 请求链接
    start_urls = ["https://movie.douban.com/top250"]

    def parse(self, response):
        item = DoubanItem()
        node_list = response.xpath('//div[@class="article"]/ol/li//div[@class="info"]')
        for li in node_list:
            item['name'] = li.xpath('./div[@class="hd"]/a/span[1]/text()').extract_first()
            item['info'] = li.xpath('./div[@class="bd"]/p[1]/text()').extract_first().replace(' ', '').replace('\n', '')
            item['introduce'] = li.xpath('./div[@class="bd"]/p[@class="quote"]/span/text()').extract_first()
            item['score'] = li.xpath('./div[@class="bd"]/div/span[@class="rating_num"]/text()').extract_first()
            yield item

        # 获取下一页地址
        url = response.xpath('//span[@class="next"]/a/@href').extract_first()
        # 结束条件
        if url != None:
            time.sleep(3)
            yield scrapy.Request(url=response.urljoin(url))
